train_lstm_mimo <- function(data,
date_col,
input_cols,
output_cols,
val_split = 0.1,
epochs = 50,
patience = 10,
min_delta = 1e-4,
batch_size = 32,
lr = 1e-3,
optimizer = c("adam","sgd"),
hidden_size = 50,
num_layers = 1,
activation = c("tanh","relu","linear"),
dropout = 0.0,
weight_decay = 0.0) {
optimizer <- match.arg(optimizer)
activation <- match.arg(activation)
date_col <- rlang::ensym(date_col)
# 1) Order by time index
data <- data %>% arrange(!!date_col)
data0 <- data
data <- data %>% drop_na()
# 2) Split data
n <- nrow(data)
n_val <- floor(val_split * n)
train_df <- data[1:(n - n_val), ]
val_df <- data[(n - n_val + 1):n, ]
# 3) Compute robust scaler on train_df
input_median <- sapply(input_cols, function(col) median(train_df[[col]], na.rm = TRUE))
input_iqr <- sapply(input_cols, function(col) IQR(train_df[[col]], na.rm = TRUE))
output_median <- sapply(output_cols,function(col) median(train_df[[col]], na.rm = TRUE))
output_iqr <- sapply(output_cols,function(col) IQR(train_df[[col]], na.rm = TRUE))
scaler <- list(
input_median = input_median,
input_iqr = input_iqr,
output_median = output_median,
output_iqr = output_iqr
)
# 4) Apply scaling to train and validation sets
for (col in input_cols) {
train_df[[col]] <- (train_df[[col]] - scaler$input_median[col]) / scaler$input_iqr[col]
val_df[[col]] <- (val_df[[col]] - scaler$input_median[col]) / scaler$input_iqr[col]
}
for (col in output_cols) {
train_df[[col]] <- (train_df[[col]] - scaler$output_median[col]) / scaler$output_iqr[col]
val_df[[col]] <- (val_df[[col]] - scaler$output_median[col]) / scaler$output_iqr[col]
}
# 5) Define the LSTM module
LSTMModel <- nn_module(
"LSTMModel",
initialize = function(input_size, hidden_size, num_layers, dropout, output_size, activation) {
self$lstm <- nn_lstm(
input_size = input_size,
hidden_size = hidden_size,
num_layers = num_layers,
batch_first = TRUE,
dropout = dropout
)
self$fc <- nn_linear(hidden_size, output_size)
self$act <- switch(
activation,
tanh = nn_tanh(),
relu = nn_relu(),
linear = nn_identity()
)
},
forward = function(x) {
out <- self$lstm(x)
h_last <- out[[1]][ , dim(out[[1]])[2], ]
h_act <- self$act(h_last)
self$fc(h_act)
}
)
# 6) Prepare torch datasets
make_ds <- function(df) {
x_mat <- as.matrix(df[, input_cols])
y_mat <- as.matrix(df[, output_cols])
X <- torch_tensor(x_mat, dtype = torch_float())$view(c(nrow(x_mat), -1, length(input_cols)))
Y <- torch_tensor(y_mat, dtype = torch_float())
list(x = X, y = Y)
}
train_ds <- make_ds(train_df)
val_ds <- make_ds(val_df)
# 7) Instantiate model and optimizer
model <- LSTMModel(
input_size = length(input_cols),
hidden_size = hidden_size,
num_layers = num_layers,
dropout = dropout,
output_size = length(output_cols),
activation = activation
)
optim <- switch(
optimizer,
adam = optim_adam(model$parameters, lr = lr, weight_decay = weight_decay),
sgd = optim_sgd(model$parameters, lr = lr, weight_decay = weight_decay)
)
criterion <- nn_smooth_l1_loss()
# 8) Training loop
train_loss <- numeric(epochs)
val_loss <- numeric(epochs)
# Early stopping state
best_loss <- Inf
wait <- 0
for (e in seq_len(epochs)) {
model$train()
optim$zero_grad()
preds_train <- model(train_ds$x)
loss_train <- criterion(preds_train, train_ds$y)
loss_train$backward()
optim$step()
train_loss[e] <- loss_train$item()
model$eval()
with_no_grad({
preds_val <- model(val_ds$x)
val_loss[e] <- criterion(preds_val, val_ds$y)$item()
})
# — Early stopping check —
if (val_loss[e] < best_loss - min_delta) {
best_loss <- val_loss[e]
wait <- 0
} else {
wait <- wait + 1
if (wait >= patience) {
message("Stopping early at epoch ", e,
" (no improvement for ", patience, " epochs).")
break
}
}
}
# trim losses if we stopped early
train_loss <- train_loss[1:e]
val_loss <- val_loss[1:e]
list(
model = model,
train_loss = train_loss,
val_loss = val_loss,
scaler = scaler,
data0 = data0,
input_cols = input_cols,
output_cols = output_cols,
date_col = rlang::as_string(date_col)
)
}